library(tidyverse)
## ── Attaching packages ─────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(leaflet)
library(ggplot2)
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Read in wine data.
year_extract <- function(string) {
t <- regmatches(string, regexec("[1-2][9|0][0-9][0-9]", string))
sapply(t, function(x) {
if (length(x) > 0) {
return(as.numeric(x))
} else {
return(NA)
}
})
}
wine_tidy_df =
read_csv(
"./wine_data/winemag-data-130k-v2.csv") %>%
janitor::clean_names() %>%
select(-region_2, -taster_twitter_handle, -taster_name, -x1) %>%
mutate(year = year_extract(title))
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## country = col_character(),
## description = col_character(),
## designation = col_character(),
## points = col_double(),
## price = col_double(),
## province = col_character(),
## region_1 = col_character(),
## region_2 = col_character(),
## taster_name = col_character(),
## taster_twitter_handle = col_character(),
## title = col_character(),
## variety = col_character(),
## winery = col_character()
## )
wine_by_country = wine_tidy_df %>%
mutate(
country = recode(country, US = "United States")
) %>%
group_by(country) %>%
drop_na(country) %>%
count() %>%
arrange(desc(n)) %>%
rename(
total = n) %>%
mutate(total = as.numeric(total)) %>%
view
wine_us = wine_tidy_df %>%
filter(country == "US") %>%
group_by(province) %>%
count() %>%
arrange(desc(n)) %>%
rename(
state = province,
total = n) %>%
mutate(total = as.numeric(total)) %>%
view
states <- states(cb = TRUE)
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
states %>%
leaflet() %>%
addTiles() %>%
addPolygons(popup = ~NAME)
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'
states_merged_wine <- geo_join(states, wine_us, "NAME", "state")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
mybins <- c(0,100,1000,8000,10000,40000)
mypal <- colorBin(palette = "Purples", domain = states_merged_wine$total, na.color = "transparent", bins = mybins)
states_merged_wine <- subset(states_merged_wine, !is.na(total))
popup <- paste0("Wineries: ", as.character(states_merged_wine$total))
states_merged_wine %>%
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(-98.483330, 38.712046, zoom = 4) %>%
addPolygons(
fillColor = ~mypal(total),
fillOpacity = 1.0,
weight = 0.4,
smoothFactor = 0.2,
popup = ~popup) %>%
addLegend(pal = mypal,
values = states_merged_wine$total,
position = "bottomright",
title = "Wineries")
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'
wine_by_country
## # A tibble: 43 x 2
## # Groups: country [43]
## country total
## <chr> <dbl>
## 1 United States 54504
## 2 France 22093
## 3 Italy 19540
## 4 Spain 6645
## 5 Portugal 5691
## 6 Chile 4472
## 7 Argentina 3800
## 8 Austria 3345
## 9 Australia 2329
## 10 Germany 2165
## # … with 33 more rows
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.5-18, (SVN revision 1082)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.1.1, released 2020/06/22
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/sf/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/rgdal/proj
## Linking to sp version:1.4-4
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
world_spdf <- readOGR(
dsn = paste0(getwd(),"/wine_data/world_shape_file/") ,
layer = "TM_WORLD_BORDERS_SIMPL-0.3",
verbose = FALSE
)
world_spdf %>%
leaflet() %>%
addTiles() %>%
addPolygons(popup = ~NAME)
countries_merged_wine <- geo_join(world_spdf, wine_by_country, "NAME", "country")
world_bins <- c(0,100,1000,10000,20000,30000,60000)
world_pal <- colorBin(palette = "Reds", domain = countries_merged_wine$total, na.color = "transparent", bins = world_bins)
countries_merged_wine <- subset(countries_merged_wine, !is.na(total))
world_popup <- paste0("Wineries: ", as.character(countries_merged_wine$total))
countries_merged_wine %>%
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(-98.483330, 38.712046, zoom = 4) %>%
addPolygons(
fillColor = ~world_pal(total),
fillOpacity = 1.0,
weight = 0.4,
smoothFactor = 0.2,
popup = ~world_popup) %>%
addLegend(pal = world_pal,
values = countries_merged_wine$total,
position = "bottomright",
title = "Wineries")